Searches for the matrix with the higher probability of having a CGCGCG primer (only one solution). Minimizes 1-p(CGCGCG)
test.perf <- read.csv('~/mnt/edann/hexamers/strand_specific/test_function.performance.csv', header = FALSE, col.names = c('it','score'))
head(test.perf)
Functions for reshaping and plotting are in DE_performance.r
max(long.mat$iter)
[1] "999"
l <- lapply(seq(1,3), function(i) plot.iteration(long.mat, i))
# png('~/AvOwork/output/DE_optimization/test_iter_matrix.png')
for(el in l){
# png(paste0("~/AvOwork/output/DE_optimization/DE_iter-", el$data$iter[1], ".png"))
plot(el)
# dev.off()
}
With bigger population size I get better scores and if the algorithm gets stuck in a local minimum it happens after more iterations.
rho.files.10 <- list.files(opt.dir, pattern='_pop10_its300.DE.rho.txt', full.names = TRUE)
rho.files.20 <- list.files(opt.dir, pattern='_pop20_its300.DE.rho.txt', full.names = TRUE)
rho.files.30 <- list.files(opt.dir, pattern='_pop30_its300.DE.rho.txt', full.names = TRUE)
plot.optimization.score(rho.files.10) +
ggtitle('Pop.size=10')
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
plot.optimization.score(rho.files.20) +
ggtitle('Pop.size=20')
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
plot.optimization.score(rho.files.30) +
ggtitle('Pop.size=30')
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
Read 300 items
plot.optimization.score(score.ctcf.files)
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
performance.mat <- read.csv(matrix.ctcf.files[4], row.names = 1)
long.mat <- reshape.prob.mat(performance.mat)
Using iter as id variables
head(performance.mat)
nrow(performance.mat)
[1] 100
lapply(matrix.ctcf.files, function(mat)
plot.iteration(reshape.prob.mat(read.csv(mat, row.names = 1)),99) +
ggtitle(gsub(mat, pattern = '.+//', replacement = '')))
Using iter as id variables
Using iter as id variables
Using iter as id variables
Using iter as id variables
Using iter as id variables
Using iter as id variables
Using iter as id variables
Using iter as id variables
Using iter as id variables
Using iter as id variables
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
[[9]]
[[10]]
Test4 is so weird! Best score and ends with all Ts!!
performance.mat <- read.csv(matrix.ctcf.files[5], row.names = 1)
long.mat <- reshape.prob.mat(performance.mat)
lapply(c(1,10,20,50,70,99),function(it) plot.iteration(long.mat, it))
bestmat.bw <- import(bestmat.bw.file, format = 'BigWig')
Error in import(bestmat.bw.file, format = "BigWig") :
could not find function "import"
plot.refpoint.profile.df(not.norm.bestmat.df)
[1] 0 300 600
plot.refpoint.profile.df(norm.bestmat.df)
[1] 0 300 600
Not total abundance
plot.optimization.score(score.ctcf.files) +
theme(legend.title = element_blank())
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
plot.optimization.score(score.ctcf.files) +
theme(legend.title = element_blank())
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
Read 100 items
plot.optimization.score(score.ctcf.files) +
theme(legend.title = element_blank())
Read 200 items
Read 200 items
Read 200 items
Read 200 items
Read 200 items
Read 200 items
Read 200 items
Read 200 items
Read 200 items
Read 200 items
Load coverage profiles around CTCF sites
prof.files <- list.files(paste0(opt.dir,'predicted_cov_bestmat'), pattern = 'profile.txt', full.names = T)
profiles <- lapply(prof.files, load.profile)
Read 600 items
Read 600 items
Read 600 items
Read 600 items
Read 600 items
Read 600 items
Read 600 items
names(profiles) <- gsub(prof.files, pattern = '.+500reg|.bestMat.+profile.txt', replacement = '')
df <- make.df.of.profiles(profiles)
best.scores <- sapply(score.ctcf.files, function(x) rev.default(scan(x))[1])
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
names(best.scores) <- gsub(names(best.scores), pattern = '.+/|.DE.+.txt', replacement = '')
df.wScores <- df %>% mutate(score=best.scores[match(df$sample, names(best.scores))])
df.wScores %>%
filter(sample!='test1_ctcf_kmersFC_pop30_its100') %>%
filter(sample!="test10_ctcf_kmersFC_pop30_its100") %>%
plot.refpoint.profile.df(., color = 'score') +
guides(color = guide_colorbar(barwidth = 20, barheight = 1))
[1] 0 300 600
Not normalized
profiles <- lapply(prof.files, load.profile, normalize=F)
Read 600 items
Read 600 items
Read 600 items
Read 600 items
Read 600 items
Read 600 items
Read 600 items
names(profiles) <- gsub(prof.files, pattern = '.+500reg|.bestMat.+profile.txt', replacement = '')
df <- make.df.of.profiles(profiles)
best.scores <- sapply(score.ctcf.files, function(x) rev.default(scan(x))[1])
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
Read 100 items
Read 100 items
Read 200 items
names(best.scores) <- gsub(names(best.scores), pattern = '.+/|.DE.+.txt', replacement = '')
df.wScores <- df %>% mutate(score=best.scores[match(df$sample, names(best.scores))])
df.wScores %>%
filter(sample!='test1_ctcf_kmersFC_pop30_its100') %>%
filter(sample!="test10_ctcf_kmersFC_pop30_its100") %>%
plot.refpoint.profile.df(., color = 'score') +
guides(color = guide_colorbar(barwidth = 20, barheight = 1))
[1] 0 300 600
plot.expVSpred.coverage.track(l.best.bw[[2]])
The following non-numeric data column has been dropped: id
[[1]]
Genome axis 'Axis'
[[2]]
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr1
| positions: 4999
| samples:1
| strand: *
regionsWctcf
[1] "chr1 (1.82535e+08,1.8269e+08]" "chr10 (8.91e+06,9.78e+06]" "chr11 (7.0175e+07,7.032e+07]" "chr13 (5.801e+07,5.816e+07]"
[5] "chr13 (9.9375e+07,9.9725e+07]" "chr14 (2.1045e+07,2.111e+07]" "chr14 (6.985e+07,6.989e+07]" "chr15 (5.847e+07,5.878e+07]"
[9] "chr17 (2.6425e+07,2.6595e+07]" "chr17 (2.776e+07,2.8335e+07]" "chr17 (2.776e+07,2.8335e+07]" "chr18 (6.0855e+07,6.1005e+07]"
[13] "chr18 (6.4525e+07,6.461e+07]" "chr18 (6.727e+07,6.7355e+07]" "chr19 (3.2715e+07,3.305e+07]" "chr19 (4.366e+07,4.4e+07]"
[17] "chr2 (1.2636e+08,1.26775e+08]" "chr2 (1.5342e+08,1.53505e+08]" "chr4 (9.78e+06,9.88e+06]" "chr6 (3.416e+07,3.426e+07]"
[21] "chr6 (8.3135e+07,8.323e+07]" "chr6 (1.48705e+08,1.49265e+08]" "chr7 (2.524e+07,2.552e+07]" "chr7 (2.8335e+07,2.946e+07]"
[25] "chr7 (2.8335e+07,2.946e+07]" "chr7 (9.7675e+07,9.8015e+07]" "chr7 (1.4244e+08,1.4268e+08]" "chr8 (2.4525e+07,2.481e+07]"
[29] "chr8 (5.366e+07,5.367e+07]" "chr8 (8.4695e+07,8.47e+07]" "chr8 (8.4695e+07,8.47e+07]" "chr8 (1.0857e+08,1.08725e+08]"
[33] "chr9 (1.06345e+08,1.06605e+08]" "chrX (1.53505e+08,1.5367e+08]"
lapply(sample(regionsWctcf,10), function(x) plot.cov.wAnnotation(l.best.bw[x][[1]], ctcf))
The following non-numeric data column has been dropped: id
[[1]]
[[1]]$Axis
Genome axis 'Axis'
[[1]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr1
| positions: 5000
| samples:1
| strand: *
[[1]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr1
| annotation features: 1
[[1]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[2]]
[[2]]$Axis
Genome axis 'Axis'
[[2]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr19
| positions: 4999
| samples:1
| strand: *
[[2]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr19
| annotation features: 1
[[2]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[3]]
[[3]]$Axis
Genome axis 'Axis'
[[3]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr7
| positions: 4999
| samples:1
| strand: *
[[3]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr7
| annotation features: 2
[[3]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[4]]
[[4]]$Axis
Genome axis 'Axis'
[[4]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr14
| positions: 4999
| samples:1
| strand: *
[[4]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr14
| annotation features: 1
[[4]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[5]]
[[5]]$Axis
Genome axis 'Axis'
[[5]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr17
| positions: 4999
| samples:1
| strand: *
[[5]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr17
| annotation features: 2
[[5]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[6]]
[[6]]$Axis
Genome axis 'Axis'
[[6]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr8
| positions: 4999
| samples:1
| strand: *
[[6]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr8
| annotation features: 2
[[6]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[7]]
[[7]]$Axis
Genome axis 'Axis'
[[7]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr13
| positions: 4999
| samples:1
| strand: *
[[7]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr13
| annotation features: 1
[[7]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[8]]
[[8]]$Axis
Genome axis 'Axis'
[[8]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr18
| positions: 4999
| samples:1
| strand: *
[[8]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr18
| annotation features: 1
[[8]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[9]]
[[9]]$Axis
Genome axis 'Axis'
[[9]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr17
| positions: 4999
| samples:1
| strand: *
[[9]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr17
| annotation features: 2
[[9]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[10]]
[[10]]$Axis
Genome axis 'Axis'
[[10]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr8
| positions: 4999
| samples:1
| strand: *
[[10]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr8
| annotation features: 2
[[10]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
Compare with even primer probability predicted coverage
plot.expVSpred.coverage.track(l.comon.bw[[2]])
The following non-numeric data column has been dropped: id
[[1]]
Genome axis 'Axis'
[[2]]
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr1
| positions: 4999
| samples:2
| strand: *
lapply(sample(regionsWctcf,10), function(x) plot.cov.wAnnotation(l.comon.bw[x][[1]], ctcf))
The following non-numeric data column has been dropped: id
[[1]]
[[1]]$Axis
Genome axis 'Axis'
[[1]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr8
| positions: 4999
| samples:2
| strand: *
[[1]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr8
| annotation features: 1
[[1]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[2]]
[[2]]$Axis
Genome axis 'Axis'
[[2]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr19
| positions: 4999
| samples:2
| strand: *
[[2]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr19
| annotation features: 1
[[2]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[3]]
[[3]]$Axis
Genome axis 'Axis'
[[3]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr7
| positions: 4999
| samples:2
| strand: *
[[3]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr7
| annotation features: 1
[[3]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[4]]
[[4]]$Axis
Genome axis 'Axis'
[[4]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr8
| positions: 4999
| samples:2
| strand: *
[[4]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr8
| annotation features: 1
[[4]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[5]]
[[5]]$Axis
Genome axis 'Axis'
[[5]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr7
| positions: 4999
| samples:2
| strand: *
[[5]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr7
| annotation features: 2
[[5]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[6]]
[[6]]$Axis
Genome axis 'Axis'
[[6]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr18
| positions: 4999
| samples:2
| strand: *
[[6]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr18
| annotation features: 1
[[6]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[7]]
[[7]]$Axis
Genome axis 'Axis'
[[7]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr6
| positions: 4999
| samples:2
| strand: *
[[7]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr6
| annotation features: 1
[[7]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[8]]
[[8]]$Axis
Genome axis 'Axis'
[[8]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr2
| positions: 5000
| samples:2
| strand: *
[[8]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr2
| annotation features: 1
[[8]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[9]]
[[9]]$Axis
Genome axis 'Axis'
[[9]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr4
| positions: 4999
| samples:2
| strand: *
[[9]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr4
| annotation features: 1
[[9]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
[[10]]
[[10]]$Axis
Genome axis 'Axis'
[[10]]$`norm. coverage`
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr1
| positions: 5000
| samples:2
| strand: *
[[10]]$CTCF
AnnotationTrack 'CTCF'
| genome: NA
| active chromosome: chr1
| annotation features: 1
[[10]]$titles
An object of class "ImageMap"
Slot "coords":
x1 y1 x2 y2
Axis 6 37.24747 38.42335 77.20723
norm. coverage 6 77.20723 38.42335 278.26342
CTCF 6 278.26342 38.42335 318.47466
Slot "tags":
$title
Axis norm. coverage CTCF
"Axis" "norm. coverage" "CTCF"
even.prof <- load.profile('~/mnt/edann/hexamers/strand_specific/artificial_coverage/highcov.random.42.artCov.CTCF.profile.txt')
Read 600 items
ctcf.prof <- load.profile('~/mnt/edann/hexamers/strand_specific/artificial_coverage/highcov.random.42test9_ctcf_kmersFC_pop30_its200.bestMat.coverage.artCov.CTCF.profile.txt')
Read 600 items
df <- make.df.of.profiles(list(
CTCF=ctcf.prof,
even=even.prof
))
plot.refpoint.profile.df(df)
[1] 0 300 600
tab <- read.table(gzfile('~/mnt/edann/hexamers/strand_specific/artificial_coverage/highcov.random.42test9_ctcf_kmersFC_pop30_its200.bestMat.coverage.artCov.CTCF.mat.gz'), skip = 1)
colnames(tab) <- c('chr', 'start', 'end', 'id', 'len', 'strand', seq(1,600))
tab.even <- read.table(gzfile('~/mnt/edann/hexamers/strand_specific/artificial_coverage/highcov.random.42.artCov.mat.gz'), skip = 1)
colnames(tab.even) <- c('chr', 'start', 'end', 'id', 'len', 'strand', seq(1,600))
tab %>% select(-c(chr,start,end,len, strand)) %>%
melt(id.vars=c('id'), variable.name='position', value.name='coverage') %>%
group_by(id) %>% mutate(zscore=(coverage-mean(coverage, na.rm=T))/sd(coverage, na.rm=T)) %>%
ungroup() %>%
group_by(position) %>%
summarise(avg=mean(zscore, na.rm=T)) %>%
ggplot(., aes(as.numeric(position), avg)) +
geom_line()
tab.even %>% select(-c(chr,start,end,len, strand)) %>%
melt(id.vars=c('id'), variable.name='position', value.name='coverage') %>%
group_by(id) %>% mutate(zscore=(coverage-mean(coverage, na.rm=T))/sd(coverage, na.rm=T)) %>%
ungroup() %>%
group_by(position) %>%
summarise(avg=mean(zscore, na.rm=T)) %>%
ggplot(., aes(as.numeric(position), avg)) +
geom_line()
opt.dir <- '~/mnt/edann/hexamers/DEoptimization/even_cov/'
score.even.files.100 <- list.files(opt.dir, pattern = '.+even_pop30_its100.+rho.txt', full.names = TRUE)
score.even.files.200 <- list.files(opt.dir, pattern = '.+even_pop30_its200.+rho.txt', full.names = TRUE)
plot.optimization.score(score.even.files.100) +
theme(legend.title = element_blank())
plot.optimization.score(score.even.files.200) +
theme(legend.title = element_blank())
original.pop.good <- read.csv(paste0(opt.dir, 'test5_even_pop30_its100.DE.originalPop.csv'), row.names = 1)
original.pop.bad <- read.csv(paste0(opt.dir, 'test1_even_pop30_its100.DE.originalPop.csv'), row.names = 1)
boxplot(as.vector(cor(t(original.pop.bad))), as.vector(cor(t(original.pop.good))), ylab='correlation between vectors', names=c('bad', 'good'))
Doesn’t seem to play a major role.
Visualizing in another way
originalpop.even.files <- list.files(opt.dir, pattern = '.+even_pop30_its200.+originalPop.csv', full.names = TRUE)
pops <- lapply(originalpop.even.files,function(x) read.csv(x, row.names = 1) %>% reshape.prob.mat %>% mutate(p=paste0(nuc,'.',pos), name=gsub(x, pattern = '.+/|.DE.+', replacement = '')) )
mean.pops <- do.call(rbind,pops)
mean.pops %>% group_by(name,nuc) %>% summarise(avg=mean(prob)) %>%
ggplot(., aes(nuc, avg, group=name, color=name)) +
# facet_grid(name~.) +
geom_line() +
# ylim(0,1) +
xlab('nucleotide')
This is interesting: less Cs in the ones that end up having a higher score
plot.expVSpred.coverage.track(l.comon.bw[[2]])
The following non-numeric data column has been dropped: id
[[1]]
Genome axis 'Axis'
[[2]]
DataTrack 'norm. coverage'
| genome: NA
| active chromosome: chr1
| positions: 4999
| samples:2
| strand: *
lapply(sample(clean.common.bw,10), nice.plotTrack, labels=c('Even primer conc', 'Optimized primer conc'))
$`chr1 (5.7585e+07,5.7615e+07]`
$`chr10 (5.836e+07,5.837e+07]`
$`chr11 (2.6855e+07,2.686e+07]`
$`chr1 (9.763e+07,9.766e+07]`
$`chr10 (3.936e+07,3.938e+07]`
$`chr1 (5.5295e+07,5.5315e+07]`
$`chr1 (4.1015e+07,4.1045e+07]`
$`chr1 (1.82945e+08,1.82965e+08]`
$`chr10 (5.4115e+07,5.415e+07]`
$`chr10 (4.4135e+07,4.4185e+07]`
Looks good, more noisy than not optimized data.
Profile
ggsave('~/AvOwork/output/DE_optimization/even_optimization.pdf')
Saving 7 x 7 in image